{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "aad38aba",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn.preprocessing import OrdinalEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3b1dfbd5",
   "metadata": {},
   "outputs": [],
   "source": [
    "!python honors.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "07fda137",
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "all_files = glob.glob(\"Storing_csv_files_3\" + \"/*.csv\")\n",
    "frame = []\n",
    "for filename in all_files:\n",
    "    df = pd.read_csv(filename)\n",
    "    frame.append(df) \n",
    "original_df = pd.concat(frame)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d480ea24",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>Summary</th>\n",
       "      <th>Issue id</th>\n",
       "      <th>Status</th>\n",
       "      <th>Assignee</th>\n",
       "      <th>Updated</th>\n",
       "      <th>Last Viewed</th>\n",
       "      <th>Original estimate</th>\n",
       "      <th>Time Spent</th>\n",
       "      <th>Custom field (Start date)</th>\n",
       "      <th>...</th>\n",
       "      <th>Random_Assignee</th>\n",
       "      <th>percent_random</th>\n",
       "      <th>percent_sign</th>\n",
       "      <th>percentage</th>\n",
       "      <th>Due date</th>\n",
       "      <th>Label</th>\n",
       "      <th>percent_random_time_spent</th>\n",
       "      <th>percent_sign_time_spent</th>\n",
       "      <th>percentage_time_spent</th>\n",
       "      <th>Random_time_spent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>Problem statement</td>\n",
       "      <td>10018</td>\n",
       "      <td>Done</td>\n",
       "      <td>Alexey</td>\n",
       "      <td>3/7/2022 13:12</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>12</td>\n",
       "      <td>12.826985</td>\n",
       "      <td>2021-11-01</td>\n",
       "      <td>...</td>\n",
       "      <td>Angie</td>\n",
       "      <td>1.603110</td>\n",
       "      <td>2</td>\n",
       "      <td>3.206221</td>\n",
       "      <td>2022-03-22</td>\n",
       "      <td>0</td>\n",
       "      <td>0.568915</td>\n",
       "      <td>2</td>\n",
       "      <td>1.137831</td>\n",
       "      <td>12.826985</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Project Proposal</td>\n",
       "      <td>10019</td>\n",
       "      <td>Done</td>\n",
       "      <td>Angie</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>24</td>\n",
       "      <td>19.502708</td>\n",
       "      <td>2021-11-28</td>\n",
       "      <td>...</td>\n",
       "      <td>Violet</td>\n",
       "      <td>0.043981</td>\n",
       "      <td>-2</td>\n",
       "      <td>-0.087963</td>\n",
       "      <td>2021-12-31</td>\n",
       "      <td>0</td>\n",
       "      <td>1.937838</td>\n",
       "      <td>2</td>\n",
       "      <td>3.875677</td>\n",
       "      <td>19.502708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>Assign project tasks to the team</td>\n",
       "      <td>10020</td>\n",
       "      <td>Done</td>\n",
       "      <td>Manoj</td>\n",
       "      <td>3/7/2022 13:13</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>2</td>\n",
       "      <td>0.669527</td>\n",
       "      <td>2021-11-30</td>\n",
       "      <td>...</td>\n",
       "      <td>Alexey</td>\n",
       "      <td>0.856437</td>\n",
       "      <td>2</td>\n",
       "      <td>1.712874</td>\n",
       "      <td>2022-03-18</td>\n",
       "      <td>1</td>\n",
       "      <td>0.165237</td>\n",
       "      <td>-2</td>\n",
       "      <td>-0.330473</td>\n",
       "      <td>0.669527</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Discuss budgeting</td>\n",
       "      <td>10021</td>\n",
       "      <td>Done</td>\n",
       "      <td>Violet</td>\n",
       "      <td>3/7/2022 13:04</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>4</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2021-12-01</td>\n",
       "      <td>...</td>\n",
       "      <td>Bai Niang</td>\n",
       "      <td>1.492974</td>\n",
       "      <td>2</td>\n",
       "      <td>2.985948</td>\n",
       "      <td>2022-04-25</td>\n",
       "      <td>0</td>\n",
       "      <td>0.559747</td>\n",
       "      <td>2</td>\n",
       "      <td>1.119494</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Software Requirements Document</td>\n",
       "      <td>10022</td>\n",
       "      <td>Done</td>\n",
       "      <td>Latifah</td>\n",
       "      <td>3/7/2022 13:04</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>48</td>\n",
       "      <td>103.471076</td>\n",
       "      <td>2021-12-02</td>\n",
       "      <td>...</td>\n",
       "      <td>Klaus</td>\n",
       "      <td>1.006903</td>\n",
       "      <td>2</td>\n",
       "      <td>2.013806</td>\n",
       "      <td>2022-03-29</td>\n",
       "      <td>0</td>\n",
       "      <td>1.655647</td>\n",
       "      <td>2</td>\n",
       "      <td>3.311295</td>\n",
       "      <td>103.471076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>View Foreign Exchange account</td>\n",
       "      <td>10033</td>\n",
       "      <td>Done</td>\n",
       "      <td>Mulan</td>\n",
       "      <td>3/7/2022 13:07</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>36</td>\n",
       "      <td>-45.986731</td>\n",
       "      <td>2022-03-13</td>\n",
       "      <td>...</td>\n",
       "      <td>Violet</td>\n",
       "      <td>1.970333</td>\n",
       "      <td>2</td>\n",
       "      <td>3.940666</td>\n",
       "      <td>2022-06-28</td>\n",
       "      <td>0</td>\n",
       "      <td>1.416114</td>\n",
       "      <td>2</td>\n",
       "      <td>2.832228</td>\n",
       "      <td>-45.986731</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>Exchange currency tab</td>\n",
       "      <td>10034</td>\n",
       "      <td>Done</td>\n",
       "      <td>Mushu</td>\n",
       "      <td>3/7/2022 12:58</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>48</td>\n",
       "      <td>30.319552</td>\n",
       "      <td>2022-04-04</td>\n",
       "      <td>...</td>\n",
       "      <td>Alexandra</td>\n",
       "      <td>1.698600</td>\n",
       "      <td>2</td>\n",
       "      <td>3.397201</td>\n",
       "      <td>2022-05-18</td>\n",
       "      <td>0</td>\n",
       "      <td>1.015978</td>\n",
       "      <td>2</td>\n",
       "      <td>2.031955</td>\n",
       "      <td>30.319552</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>User testing</td>\n",
       "      <td>10035</td>\n",
       "      <td>Done</td>\n",
       "      <td>Shang</td>\n",
       "      <td>3/7/2022 12:56</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>8</td>\n",
       "      <td>-2.237213</td>\n",
       "      <td>2022-04-15</td>\n",
       "      <td>...</td>\n",
       "      <td>Max</td>\n",
       "      <td>1.802324</td>\n",
       "      <td>2</td>\n",
       "      <td>3.604647</td>\n",
       "      <td>2022-07-09</td>\n",
       "      <td>0</td>\n",
       "      <td>0.618607</td>\n",
       "      <td>2</td>\n",
       "      <td>1.237213</td>\n",
       "      <td>-2.237213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>Vendors testing the software for any bugs</td>\n",
       "      <td>10036</td>\n",
       "      <td>Done</td>\n",
       "      <td>Jisoo</td>\n",
       "      <td>3/7/2022 12:56</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>24</td>\n",
       "      <td>43.850126</td>\n",
       "      <td>2022-04-26</td>\n",
       "      <td>...</td>\n",
       "      <td>Lyte</td>\n",
       "      <td>0.590071</td>\n",
       "      <td>2</td>\n",
       "      <td>1.180141</td>\n",
       "      <td>2022-06-23</td>\n",
       "      <td>0</td>\n",
       "      <td>1.692506</td>\n",
       "      <td>2</td>\n",
       "      <td>3.385013</td>\n",
       "      <td>43.850126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>Releasing 2.0</td>\n",
       "      <td>10037</td>\n",
       "      <td>Done</td>\n",
       "      <td>Frankie</td>\n",
       "      <td>3/7/2022 13:06</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>9</td>\n",
       "      <td>9.546230</td>\n",
       "      <td>2022-05-24</td>\n",
       "      <td>...</td>\n",
       "      <td>Shang</td>\n",
       "      <td>1.208761</td>\n",
       "      <td>2</td>\n",
       "      <td>2.417522</td>\n",
       "      <td>2022-08-30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.454623</td>\n",
       "      <td>2</td>\n",
       "      <td>0.909246</td>\n",
       "      <td>9.546230</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>30000 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                                    Summary  Issue id Status  \\\n",
       "0            0                          Problem statement     10018   Done   \n",
       "1            1                           Project Proposal     10019   Done   \n",
       "2            2           Assign project tasks to the team     10020   Done   \n",
       "3            3                          Discuss budgeting     10021   Done   \n",
       "4            4             Software Requirements Document     10022   Done   \n",
       "..         ...                                        ...       ...    ...   \n",
       "15          15              View Foreign Exchange account     10033   Done   \n",
       "16          16                      Exchange currency tab     10034   Done   \n",
       "17          17                               User testing     10035   Done   \n",
       "18          18  Vendors testing the software for any bugs     10036   Done   \n",
       "19          19                              Releasing 2.0     10037   Done   \n",
       "\n",
       "   Assignee         Updated     Last Viewed  Original estimate  Time Spent  \\\n",
       "0    Alexey  3/7/2022 13:12  3/7/2022 13:14                 12   12.826985   \n",
       "1     Angie             NaN             NaN                 24   19.502708   \n",
       "2     Manoj  3/7/2022 13:13  3/7/2022 13:14                  2    0.669527   \n",
       "3    Violet  3/7/2022 13:04  3/7/2022 13:14                  4    0.000000   \n",
       "4   Latifah  3/7/2022 13:04  3/7/2022 13:14                 48  103.471076   \n",
       "..      ...             ...             ...                ...         ...   \n",
       "15    Mulan  3/7/2022 13:07  3/7/2022 13:14                 36  -45.986731   \n",
       "16    Mushu  3/7/2022 12:58  3/7/2022 13:14                 48   30.319552   \n",
       "17    Shang  3/7/2022 12:56  3/7/2022 13:14                  8   -2.237213   \n",
       "18    Jisoo  3/7/2022 12:56  3/7/2022 13:14                 24   43.850126   \n",
       "19  Frankie  3/7/2022 13:06  3/7/2022 13:14                  9    9.546230   \n",
       "\n",
       "   Custom field (Start date)  ...  Random_Assignee percent_random  \\\n",
       "0                 2021-11-01  ...            Angie       1.603110   \n",
       "1                 2021-11-28  ...           Violet       0.043981   \n",
       "2                 2021-11-30  ...           Alexey       0.856437   \n",
       "3                 2021-12-01  ...        Bai Niang       1.492974   \n",
       "4                 2021-12-02  ...            Klaus       1.006903   \n",
       "..                       ...  ...              ...            ...   \n",
       "15                2022-03-13  ...           Violet       1.970333   \n",
       "16                2022-04-04  ...        Alexandra       1.698600   \n",
       "17                2022-04-15  ...              Max       1.802324   \n",
       "18                2022-04-26  ...             Lyte       0.590071   \n",
       "19                2022-05-24  ...            Shang       1.208761   \n",
       "\n",
       "    percent_sign  percentage    Due date  Label  percent_random_time_spent  \\\n",
       "0              2    3.206221  2022-03-22      0                   0.568915   \n",
       "1             -2   -0.087963  2021-12-31      0                   1.937838   \n",
       "2              2    1.712874  2022-03-18      1                   0.165237   \n",
       "3              2    2.985948  2022-04-25      0                   0.559747   \n",
       "4              2    2.013806  2022-03-29      0                   1.655647   \n",
       "..           ...         ...         ...    ...                        ...   \n",
       "15             2    3.940666  2022-06-28      0                   1.416114   \n",
       "16             2    3.397201  2022-05-18      0                   1.015978   \n",
       "17             2    3.604647  2022-07-09      0                   0.618607   \n",
       "18             2    1.180141  2022-06-23      0                   1.692506   \n",
       "19             2    2.417522  2022-08-30      0                   0.454623   \n",
       "\n",
       "    percent_sign_time_spent percentage_time_spent  Random_time_spent  \n",
       "0                         2              1.137831          12.826985  \n",
       "1                         2              3.875677          19.502708  \n",
       "2                        -2             -0.330473           0.669527  \n",
       "3                         2              1.119494           0.000000  \n",
       "4                         2              3.311295         103.471076  \n",
       "..                      ...                   ...                ...  \n",
       "15                        2              2.832228         -45.986731  \n",
       "16                        2              2.031955          30.319552  \n",
       "17                        2              1.237213          -2.237213  \n",
       "18                        2              3.385013          43.850126  \n",
       "19                        2              0.909246           9.546230  \n",
       "\n",
       "[30000 rows x 24 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "original_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "47d3a709",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   Status Random_Assignee\n",
      "0    Done           Angie\n",
      "1    Done          Violet\n",
      "2    Done          Alexey\n",
      "3    Done       Bai Niang\n",
      "4    Done           Klaus\n",
      "..    ...             ...\n",
      "15   Done          Violet\n",
      "16   Done       Alexandra\n",
      "17   Done             Max\n",
      "18   Done            Lyte\n",
      "19   Done           Shang\n",
      "\n",
      "[30000 rows x 2 columns]\n",
      "[[ 0.  2.]\n",
      " [ 0. 17.]\n",
      " [ 0.  1.]\n",
      " ...\n",
      " [ 0. 13.]\n",
      " [ 0. 11.]\n",
      " [ 0. 16.]]\n"
     ]
    }
   ],
   "source": [
    "original_df['Status'].fillna('Done', inplace = True)\n",
    "transform_df = original_df[['Status','Random_Assignee']]\n",
    "print(transform_df\n",
    "\n",
    "encoder = OrdinalEncoder()\n",
    "encoder.fit(transform_df)\n",
    "final_df = encoder.transform(transform_df)\n",
    "print(final_df)\n",
    "#final_df = pd.DataFrame(final_df, columns = ['Status', 'Label(Binary) Project with all tasks', 'Assignee' ])\n",
    "#final_df = pd.concat([final_df, original_df], axis = 1, ignore_indexing = True)\n",
    "#final_df\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "70d60153",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "original_df['diff_dates'].isnull().any().any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "cfc9b0ad",
   "metadata": {},
   "outputs": [],
   "source": [
    "arr = original_df[['diff_dates','percent_random', 'Original estimate', 'diff_time_spent', 'Time Spent','Label']].to_numpy()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "de143819",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = np.hstack((final_df,arr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "5a99eee1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[  0.        ,   2.        ,  27.        , ...,  -0.82698452,\n",
       "         12.82698452,   0.        ],\n",
       "       [  0.        ,  17.        ,  17.        , ...,   4.49729229,\n",
       "         19.50270771,   0.        ],\n",
       "       [  0.        ,   1.        ,  29.        , ...,   1.33047328,\n",
       "          0.66952672,   1.        ],\n",
       "       ...,\n",
       "       [  0.        ,  13.        ,  15.        , ...,  10.23721311,\n",
       "         -2.23721311,   0.        ],\n",
       "       [  0.        ,  11.        ,  18.        , ..., -19.85012556,\n",
       "         43.85012556,   0.        ],\n",
       "       [  0.        ,  16.        ,  22.        , ...,  -0.54622988,\n",
       "          9.54622988,   0.        ]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "1f03a6bc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(21000, 6)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(dataset[:,0:6], dataset[:,-1], test_size=0.3, random_state=32)\n",
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "134dfe2e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9905555555555555"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "neigh = KNeighborsClassifier(n_neighbors= 2 )\n",
    "neigh.fit(X_train, y_train)\n",
    "y_testing_prediction = neigh.predict(X_test)\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test, y_testing_prediction)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "94d152f0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9912222222222222"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neigh = KNeighborsClassifier(n_neighbors= 3 )\n",
    "neigh.fit(X_train, y_train)\n",
    "y_testing_prediction = neigh.predict(X_test)\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test, y_testing_prediction)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "4b15697b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9884444444444445"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We are trying to rerun the python script because our data has to be randomized. \n",
    "\n",
    "neigh = KNeighborsClassifier(n_neighbors= 10)\n",
    "neigh.fit(X_train, y_train)\n",
    "y_testing_prediction = neigh.predict(X_test)\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test, y_testing_prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "b50096b7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9833333333333333"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neigh = KNeighborsClassifier(n_neighbors= 20)\n",
    "neigh.fit(X_train, y_train)\n",
    "y_testing_prediction = neigh.predict(X_test)\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test, y_testing_prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "70069c46",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9444444444444444"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neigh = KNeighborsClassifier(n_neighbors= 125)\n",
    "neigh.fit(X_train, y_train)\n",
    "y_testing_prediction = neigh.predict(X_test)\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test, y_testing_prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "5135d04e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "neigh = KNeighborsClassifier(n_neighbors= 2 )\n",
    "neigh.fit(X_train, y_train)\n",
    "y_testing_prediction = neigh.predict(X_test)\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test, y_testing_prediction)\n",
    "y_true=y_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "97dbd416",
   "metadata": {},
   "outputs": [],
   "source": [
    "#confusion matrix\n",
    "from sklearn.metrics import confusion_matrix\n",
    "cm= confusion_matrix(y_true, y_testing_prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "99e9a588",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUkAAAE/CAYAAADL8TF0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAffElEQVR4nO3dfbxVZZ338c/Xg8pBMyUBEbjVHMoBSw1FHM1QMLFMnCYMTEWjmxmG8eGepoK5p5lqorzve55y7tHiVSY+JJEPgY6oiJJWKDJGKT7E8QmQIyA+RJrA4fzmj31J2+M513lo7bPP2XzfvtZrr/1b61rr2i9e5+d1retaaykiMDOz1u1R7QqYmfVkTpJmZhlOkmZmGU6SZmYZTpJmZhlOkmZmGU6SZtbjSfpfklZLekzSjZL6SuovaYmkNenzgLL9Z0tqkPSUpNPL4qMkPZq2XSFJ7Z67R8+TlHpw5cxqWES7yaM1O156ptN/s3se+N7suSQNAX4KjIiI30laANwBjABejojLJc0CDoiIL0kaAdwIjAYOBu4B3hcROyWtAC4FHkzHuCIiFufO36ezP6i77dj8dLWrYF2w54DD6bPnwdWuhnVRU1cLNu8sshrl+gD1knYA/YANwGxgbNo+D1gGfAmYCMyPiG3As5IagNGSngP2i4jlAJKuBc4GsknS3W0zK040d35p75ARLwD/BKwFGoHXIuJuYFBENKZ9GoGBqcgQYF3ZIdan2JC03jKe5SRpZsVpbu70Imm6pJVly/TyQ6ZrjROBwyh1n/eRdF6mFq113yMTz+rx3W0z6z2iAy3Dd5aJucDczC7jgWcjYjOApFuAPwE2ShocEY2SBgOb0v7rgWFl5YdS6p6vT+st41luSZpZcbrQkuyAtcAYSf3SaPQ44AlgETA17TMVWJjWFwGTJe0t6TBgOLAidcm3ShqTjnNBWZk2uSVpZsXpQkuy3UNGPCTpJuARSmNKv6DU8twXWCBpGqVEOintvzqNgD+e9p8ZEW+NKM0ArgHqKQ3YZAdtoBdMAfLodu/k0e3erWn7C12aArR93S87nVD2GnZUl87VXdySNLPiVG4KUNU4SZpZcSrQ3a42J0kzK07HBmJ6FSdJMytMV6YA9XROkmZWHLckzcwy3JI0M8vw6LaZWYZbkmZmGb4maWaW4ZakmVmGW5JmZm37/XMkaoeTpJkVx91tM7MMd7fNzDLckjQzy6jByeR+fYOZWYZbkmZWHHe3zcwyPHBjZpbhlqSZWYZbkmZmGU6SZmZt822JZmY5bkmamWXU4MCNJ5ObWXGamzu/tEPS+yWtKlt+I+kySf0lLZG0Jn0eUFZmtqQGSU9JOr0sPkrSo2nbFZLU3vmdJM2sONHc+aW9Q0Y8FRFHR8TRwCjgDeBWYBawNCKGA0vTdySNACYDI4EJwJWS6tLhrgKmA8PTMqG98ztJmllxKtCSbGEc8HREPA9MBOal+Dzg7LQ+EZgfEdsi4lmgARgtaTCwX0Qsj4gAri0r0yYnSTMrThdakpKmS1pZtkzPnGEycGNaHxQRjQDpc2CKDwHWlZVZn2JD0nrLeJYHbsysOF0Y3Y6IucDc9vaTtBdwFjC7vV1bO00mnuUkaWbFqewUoDOARyJiY/q+UdLgiGhMXelNKb4eGFZWbiiwIcWHthLPcnfbzIpTgYGbMlP4fVcbYBEwNa1PBRaWxSdL2lvSYZQGaFakLvlWSWPSqPYFZWXa5JakmRWnQi1JSf2A04A/LwtfDiyQNA1YC0wCiIjVkhYAjwNNwMz4/a1AM4BrgHpgcVqynCTNrMeLiDeA97SIbaE02t3a/nOAOa3EVwJHdubcTpJmVpwavOPGSdLMiuN7t83MMtySNDPLcEvSzCzDSdLMLCPavYGl13GSNLPiuCVpZpbhJGlmluHRbTOzDLckzcwyPHBjZpbhlqSZWYaTpJlZhgduzMzaFs2+Jmlm1jZ3t83MMmqwu+133JiZZbglaWbF8TVJM7MMX5M0M8twkjQzy/BtiZZz7fxbufm2O5HE8MMP5et/+9e8uW0bn//yN9nw4kYOPmgQ//yPs3n3fu/i9rvu5fs/uHlX2V8//Sw/uvrfGTZkMBf85Rd2xTdufokzP3oKsy77i2r8JGtFw68fZOtvf8vOnc00NTUx5oSPVbtKPYdbktaWjZtf4oabFrLwhu/Qd++9+fyXv8Hie37C08+tZcyxR/O588/hu9ct4HvXL+Cv/3IaZ55+KmeefipQSpCXzPoaR7zvcABunvcfu457zmcvZvzYE6vym6xt40+bxJYtr1S7Gj1PDQ7ceApQgZp27mTbtu00Ne3kd29uY8CB/bnvgeVMPGM8ABPPGM+99y9/R7k7lvyEM8Z/5B3x59e9wJZXXmXUUZ16l7pZ9URz55cermJJUtIRkr4k6QpJ30rrf1yp81XboAEHcuGUP2P8Jy/glInn8q59+nHi8aPY8sqrDDiwPwADDuzPy6++9o6ydy79CR87bew74ncsWcaEcScjqdLVt06ICBbfcSMPPbiYz037TLWr07M0R+eXDpC0v6SbJD0p6QlJJ0jqL2mJpDXp84Cy/WdLapD0lKTTy+KjJD2atl2hDvxxVSRJSvoSMB8QsAJ4OK3fKGlWO2WnS1opaeXcSlSuQl77zVbue+BB7vrR97l34Q387s1t3HbXve2W+9XqJ6nv25fh7z30HdsWL/0JHxs/tvjK2h/k5LFnM/r4CZz5ifOYMeNCPnzS8dWuUo8Rzc2dXjroW8CdEXEEcBTwBDALWBoRw4Gl6TuSRgCTgZHABOBKSXXpOFcB04HhaZnQ3okrdU1yGjAyInaUByX9C7AauLytghExF5ibCrz9AD3YgytXMeTgQfQ/YH8Axn3kT1j16OO854D92fzSyww4sD+bX3qZ/vu/+23lFt/Telf7yTXPsHNnMyOPGN4d1bdOaGzcCMDmzVtYuHAxxx13NA/89KEq16qHqMA1SUn7AScDFwJExHZgu6SJwNi02zxgGfAlYCIwPyK2Ac9KagBGS3oO2C8ilqfjXgucDSzOnb9S3e1m4OBW4oPTtpozeNAAfvXYk/zuzTeJCB5auYr3HjKMsSeNYeHiewBYuPgeTvnwCbvKNDc3c/d9D7SaJBffs6zVuFVXv3717LvvPrvWTxv/EVavfqrKtepBunBNsrz3mJbpLY76XmAz8H1Jv5D0XUn7AIMiohEgfQ5M+w8B1pWVX59iQ9J6y3hWpVqSlwFLJa3h95X9H8AfAX9VoXNW1QdHHsFpp5zEORddTF1dHUe873AmTTyDN373Jp//8je45fa7GDxoAP/y9f+9q8zKVY8xaMCBDBsy+B3Hu+veB7jyn77WnT/BOmDQoAHc9KPvAdCnTx3z5/+Yu+5eVt1K9SRdaEm+rffYuj7Ah4CLI+IhSd8ida3b0Np1xsjEsxQVmvwpaQ9gNKVMLUpZ++GI2NmJg8SOzU9XpH5WWXsOOJw+e7bWmbDeoGn7C10aLXz9K1M6nVD2+cqN2XNJOgh4MCIOTd8/TClJ/hEwNiIaJQ0GlkXE+yXNBoiIb6b97wK+AjwH3JeuayJpSir/57nzV2x0OyKaI+LBiLg5Im5K6x1PkGbW+1RgdDsiXgTWSXp/Co0DHgcWAVNTbCqwMK0vAiZL2lvSYZQGaFakLvlWSWPSqPYFZWXa5MnkZlacys17vBi4QdJewDPARZQaeQskTQPWApMAImK1pAWUEmkTMLOsgTYDuAaopzRgkx20ASdJMytShe64iYhVwLGtbBrXxv5zgDmtxFcCnbo7w3fcmJlluCVpZoXpxOTwXsNJ0syKU4MPuHCSNLPiOEmamWX0gqf6dJaTpJkVxy1JM7O2hZOkmVmGk6SZWYanAJmZZbglaWaW4SRpZta2Sj16sZqcJM2sOG5JmpllOEmambXN8yTNzHKcJM3MMmpvmqQfumtmluOWpJkVxtckzcxynCTNzDJq8Jqkk6SZFcbdbTOzHLckzczaVostSU8BMrPiNHdh6QBJz0l6VNIqSStTrL+kJZLWpM8DyvafLalB0lOSTi+Lj0rHaZB0hSS1d24nSTMrTDR3fumEUyLi6Ig4Nn2fBSyNiOHA0vQdSSOAycBIYAJwpaS6VOYqYDowPC0T2jupk6SZFadCLck2TATmpfV5wNll8fkRsS0ingUagNGSBgP7RcTyKD3T7dqyMm1ykjSzwlSwJRnA3ZL+S9L0FBsUEY0A6XNgig8B1pWVXZ9iQ9J6y3iWB27MrDhdaBmmpDe9LDQ3Iua22O3EiNggaSCwRNKTuUO2EotMPMtJ0swK08lrjKUypYTYMim23GdD+twk6VZgNLBR0uCIaExd6U1p9/XAsLLiQ4ENKT60lXiWu9tmVphKdLcl7SPpXW+tAx8FHgMWAVPTblOBhWl9ETBZ0t6SDqM0QLMidcm3ShqTRrUvKCvTJrckzawwXWlJdsAg4NY0W6cP8IOIuFPSw8ACSdOAtcAkgIhYLWkB8DjQBMyMiJ3pWDOAa4B6YHFastSjX9wjxY7NT1e7FtYFew44nD57HlztalgXNW1/od35g63ZOHZspxPKoGXLunSu7uKWpJkVpkItyaryNUkzswy3JM2sMNHco3vOXeIkaWaFqcXutpOkmRUmwi1JM7M2uSVpZpbha5JmZhk9edp1VzlJmllh3JI0M8twkjQzy3B328wswy1JM7MMz5M0M8uoxXmS7T7gQtIgSd+TtDh9H5Ge32Zm9jbNoU4vPV1HngJ0DXAX8NbDAX8NXFah+phZLxahTi89XUeS5IERsYD0ip+IaAJ25ouY2e4omtXppafryDXJ1yW9h/RWMUljgNcqWisz65V21ylAf03pxTqHS/oZMAD4VEVrZWa9Um9oGXZWu0kyIh6R9BHg/ZTeW/tUROyoeM3MzHqAdpOkpAtahD4kiYi4tkJ1MrNeqjeMVndWR7rbx5Wt9wXGAY8ATpJm9ja9YbS6szrS3b64/LukdwPXVaxGZtZr7a4DNy29AQwvuiJm1vvtlt1tSbeRpv9Qmlc5AlhQyUqZWe+0W3a3gX8qW28Cno+I9RWqj5n1YpXsbkuqA1YCL0TEmZL6Az8EDgWeA86JiFfSvrOBaZRufLkkIu5K8VGU7iKsB+4ALo3I1zp7x02q1Jcj4idp+ZkTpJm1pcL3bl8KPFH2fRawNCKGA0vTdySNACYDI4EJwJUplwFcBUyndMlweNqelW1JRsROSW9IendEVOUumz0HHF6N01oBmnZsqHYVrJtVqrstaSjwcWAOpRtcACYCY9P6PGAZ8KUUnx8R24BnJTUAoyU9B+wXEcvTMa8FzgYW587dke72m8CjkpYAr78VjIhLOlD2D7bXXkO64zRWsO3bX+CIAcdWuxrWRU92sVxXBm4kTafUunvL3IiY22K3fwO+CLyrLDYoIhoBIqJR0sAUHwI8WLbf+hTbkdZbxrM6kiT/My3lanCg38z+UF1JDCkhtkyKu0g6E9gUEf8laWwHDtlapo5MPKsjSXL/iPjW22ogXdqBcma2m6nQFKATgbMkfYzSDS37Sboe2ChpcGpFDgY2pf3XA8PKyg8FNqT40FbiWR15VNrUVmIXdqCcme1mKvE8yYiYHRFDI+JQSgMy90bEeZQevPNWfpoKLEzri4DJkvaWdBilAZoVqWu+VdIYSQIuKCvTpjZbkpKmAOcCh0laVLbpXcCWdn+Zme12uvntDZcDC9KbEtYCkwAiYrWkBcDjlKYtzoyIt56BO4PfTwFaTDuDNpDvbv8caAQOBP65LL4V+FVnfomZ7R6i1ct+BR4/YhmlUWwiYgulZ0m0tt8cSiPhLeMrgSM7c842k2REPA88D5yQO4Ck5RGR3cfMdg/NNTikW8TbEvsWcAwzqwHNFW5JVkNHBm7aU4P/7zAzK/F7t82sMJW+JlkNHXnv9l9JOiC3S4H1MbNerLkLS0/Xke72QcDDkhZImpDmF5U7vwL1MrNeKFCnl56u3SQZEX9HaTLm9yhNIl8j6RuSDk/bH6toDc2s19hdW5Kk5629mJYm4ADgJkn/t4J1M7NephaTZEeeTH4JpVt+XgK+C3whInZI2gNYQ+nJHGZmvaL73FkdGd0+EPhkmly+S0Q0p6dzmJkB0Fx7ObJDb0v8+8y2J9raZma7n1qcTO55kmZWmFq8s8RJ0swK0xsGYjrLSdLMCtP8jmnUvZ+TpJkVxt1tM7MMd7fNzDJ2yylAZmYd5SlAZmYZtXhNsoiH7pqZ1Sy3JM2sML4maWaW4dFtM7OMWrwm6SRpZoVxd9vMLKMWu9se3TazwlTiyeSS+kpaIemXklZL+mqK95e0RNKa9HlAWZnZkhokPSXp9LL4KEmPpm1XtPLOrndwkjSzwoQ6v3TANuDUiDgKOBqYIGkMMAtYGhHDgaXpO5JGAJOBkcAE4EpJdelYVwHTKb23a3januUkaWaFqURLMkp+m77umZYAJgLzUnwecHZanwjMj4htEfEs0ACMljQY2C8ilqf3dl1bVqZNTpJmVpiuJElJ0yWtLFumtzyupDpJq4BNwJKIeAgYFBGNAOlzYNp9CLCurPj6FBuS1lvGszxwY2aF6coUoIiYC8xtZ5+dwNGS9gdulXRkZvfWOvGRiWc5SZpZYSo9BSgiXpW0jNK1xI2SBkdEY+pKb0q7rQeGlRUbCmxI8aGtxLPc3TazwlRodHtAakEiqR4YDzwJLKL0umvS58K0vgiYLGlvSYdRGqBZkbrkWyWNSaPaF5SVaZNbkmZWmArNkxwMzEsj1HsACyLidknLgQWSpgFrgUkAEbFa0gLgcaAJmJm66wAzgGuAemBxWrKcJM2sMJW4LTEifgUc00p8CzCujTJzgDmtxFcCueuZ7+AkaWaF8W2JZmYZvi3RzGw345akmRXGj0ozM8torsE06SRpZoWpxWuSTpJmVpjaa0c6SZpZgdySNDPL8DxJM7MMD9yYmWXUXop0kjSzAvmapJlZhrvbZmYZtZcinSTNrEDubpuZZbi7bWaWUXsp0knSzArk7raZWUbUYFvSD901M8twS9LMCuPutplZhke3rcsuueRzfPaiKUQEjz32JJ/7n5+nX796brjhSg45ZBjPP7+Oc8+dwauvvlbtqhow9c+n8KnPnE1EsOaJBmZf+jX+4rLPMu6Mk2luDl5+6WVmX/xVNm18iT596vj6v/4dIz5wBHV96li44A7mXnFNtX9CVdReivQ1yW5x8MEHMXPmZxlzwsc55kPjqaur45xzzuKLX5jJfff+jJEjP8x99/6ML35hZrWrasDAgwZw/uc+zac+egFnfWQye9TtwcfP/ijf+4/rmDj2XP701M+w7O6f8pd/8zkAJpw1nj332ouzxk7hz047n09f8KcMGTa4yr+iOpqJTi89nZNkN+lT14f6+r7U1dVR36+exsaNfOITH+W6638EwHXX/4izzjq9yrW0t9T16UPfvnuX/r3q+7Jp42Ze/+3ru7bX96snovQHHhH061dPXV0dffv2ZceOHfx26+ttHbqmNXdhaY+kYZLuk/SEpNWSLk3x/pKWSFqTPg8oKzNbUoOkpySdXhYfJenRtO0KSe0+AdNJshts2PAi//pv3+HphodY+/wj/Oa1rdxzz/0MHHggL764CYAXX9zEgAHvqXJNDWDTi5u5+srrufcXt/HAo4vZuvV1frbsIQAumz2D+35xO2f+2QSu+D/fAeCu25byxhu/44FHF3PvI7dx9ZU38Nqrv6nmT6ia6MJ/HdAEfD4i/hgYA8yUNAKYBSyNiOHA0vSdtG0yMBKYAFwpqS4d6ypgOjA8LRPaO7mTZDfYf/9384kzP8r73n8Chxw6in32qefcKZ+sdrWsDfu9+12Mm3Ay44+dyMkfPIP6fn35xKfOAODfvnkVpxxzJrfffCfnTTsHgA98aCTNzc2c/MEzGH/cRC6a8RmGHjKkmj+hairRkoyIxoh4JK1vBZ4AhgATgXlpt3nA2Wl9IjA/IrZFxLNAAzBa0mBgv4hYHqVuwLVlZdpUlSQp6aLMtumSVkpaObc7K1VB4049ieeeW8dLL71MU1MTP/7xYsacMIpNm17ioIMGAnDQQQPZvHlLlWtqACecPJr1azfwypZXaWrayZL/vI9jjvvg2/a5/ZY7Oe3jpwJw5icn8MC9P6epaScvv/QKj6z4JUce9cfVqHrVVagluYukQ4FjgIeAQRHRCKVECgxMuw0B1pUVW59iQ9J6y3hWtVqSX21rQ0TMjYhjI+LY6d1Zowpau24Dxx9/DPX1fQE45ZSTePLJBm67fQnnnzcJgPPPm8Rtt91dzWpa0vjCixw16gP0rd8bgBM+fBzP/PpZDjls2K59Tj39ZJ5teG7X/mNOOg6A+n59OWrUkTyTtu1uutKSLG8YpaXVP31J+wI3A5dFRO56RmvXGSMTz6rYFCBJv2prEzCoUuftiR5++BfccssdrHjoTpqamli1ajXf/e4N7LtvP37wg29z4UWTWbfuBaZM+YtqV9WAXz2ymrtvX8ot91xPU9NOnnjsKX543a3887e/zqGHH0JEMxvWvcg/fOGbAPzg6h/xjW/9Pbfd/0MkuGX+bfz68YYq/4rqaI7Oj1ZHxFwg23GUtCelBHlDRNySwhslDY6IxtSV3pTi64FhZcWHAhtSfGgr8SxFF35UR0jaCJwOvNJyE/DziDi4AweJvfbaPa/t9Hbbt7/AEQOOrXY1rIue3PRwl957eN4hn+x0Qrn++Vuy50oj0POAlyPisrL4/wO2RMTlkmYB/SPii5JGAj8ARgMHUxrUGR4ROyU9DFxMqbt+B/DvEXFH7vyVnEx+O7BvRKxquUHSsgqe18yqpELzHk8EzgcelbQqxf4WuBxYIGkasBaYBBARqyUtAB6nNDI+MyJ2pnIzgGuAemBxWrIqliQjYlpm27mVOq+ZVU8lngIUET+l9euJAOPaKDMHmNNKfCVwZGfO79sSzawwfsCFmVlGb7jNsLOcJM2sMLX40F0nSTMrTC12t31boplZhluSZlaYSs27riYnSTMrjAduzMwyavGapJOkmRXGo9tmZhnubpuZZXjgxswsw9ckzcwyfE3SzCzD1yTNzDJ8TdLMLMMtSTOzDF+TNDPL6MqLwHo6J0kzK0ztpUgnSTMrkK9Jmpll1GKS9EN3zcwy3JI0s8J4nqSZWUYtdredJM2sMLU4T9LXJM2sMBHR6aU9kq6WtEnSY2Wx/pKWSFqTPg8o2zZbUoOkpySdXhYfJenRtO0KSerIb3KSNLPCNBOdXjrgGmBCi9gsYGlEDAeWpu9IGgFMBkamMldKqktlrgKmA8PT0vKYrXKSNLPCVKIlGRH3Ay+3CE8E5qX1ecDZZfH5EbEtIp4FGoDRkgYD+0XE8iid9NqyMllOkmZWmK60JCVNl7SybJnegVMNiohGgPQ5MMWHAOvK9lufYkPSest4uzxwY2aF6crATUTMBeYWVIXWrjNGJt4uJ0kzK0w3PuBio6TBEdGYutKbUnw9MKxsv6HAhhQf2kq8Xe5um1lhogv/ddEiYGpanwosLItPlrS3pMMoDdCsSF3yrZLGpFHtC8rKZLklaWaFqURLUtKNwFjgQEnrgX8ALgcWSJoGrAUmAUTEakkLgMeBJmBmROxMh5pBaaS8HliclnY5SZpZYSoxmTwiprSxaVwb+88B5rQSXwkc2dnzO0maWWH80F0zs4xavC3RSdLMCuOWpJlZRi22JD0FyMwswy1JMytMRHO1q1A4J0kzK4wfumtmluHXN5iZZbglaWaW4ZakmVmG50mamWXU4jxJJ0kzK4y722ZmGR64MTPLcEvSzCzDAzdmZhluSZqZZfiapJlZhluSZmYZviZpZpZRi5PJ/dBdM7MMtyTNrDDubpuZZXjgxswsoxavSTpJmllh3JI0M8twkjQzy6i9FAmqxczfG0iaHhFzq10P6xr/++0+PE+yeqZXuwL2B/G/327CSdLMLMNJ0swsw0myenw9q3fzv99uwgM3ZmYZbkmamWU4SVaBpAmSnpLUIGlWtetjHSfpakmbJD1W7bpY93CS7GaS6oD/AM4ARgBTJI2obq2sE64BJlS7EtZ9nCS732igISKeiYjtwHxgYpXrZB0UEfcDL1e7HtZ9nCS73xBgXdn39SlmZj2Qk2T3UysxTzEw66GcJLvfemBY2fehwIYq1cXM2uEk2f0eBoZLOkzSXsBkYFGV62RmbXCS7GYR0QT8FXAX8ASwICJWV7dW1lGSbgSWA++XtF7StGrXySrLd9yYmWW4JWlmluEkaWaW4SRpZpbhJGlmluEkaWaW4SRpZpbhJGk9iqQLJf3/atfD7C1OktYt0iPizHodJ0lrlaR/lHRp2fc5ki5pZb+xku6XdKukxyV9W9IeadtvJX1N0kPACZLOk7RC0ipJ33krcUq6SNKvJf0EOLG7fqNZRzhJWlu+B0wFSElvMnBDG/uOBj4PfAA4HPhkiu8DPBYRxwNbgE8DJ0bE0cBO4DOSBgNfpZQcT6P0IGKzHqNPtStgPVNEPCdpi6RjgEHALyJiSxu7r4iIZ2DXvc0nATdRSoQ3p33GAaOAhyUB1AObgOOBZRGxOZX/IfC+yvwqs85zkrSc7wIXAgcBV2f2a/kAgLe+vxkRO9O6gHkRMbt8R0lnt1LerMdwd9tybqX0PpfjKD21qC2j06Pf9qDUpf5pK/ssBT4laSCApP6SDgEeAsZKeo+kPYFJhf4Csz+QW5LWpojYLuk+4NWyFmFrlgOXU7omeT+l5NryWI9L+jvg7pRMdwAzI+JBSV9Jx2gEHgE8Em49hh+VZm1KyewRYFJErGljn7HA30TEmd1YNbNu4+62tSq95rYBWNpWgjTbHbglaR0i6QPAdS3C29L0HrOa5SRpZpbh7raZWYaTpJlZhpOkmVmGk6SZWYaTpJlZxn8DBSzpLbh9jFgAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 360x360 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "f, ax =plt.subplots(figsize = (5,5))\n",
    "sns.heatmap(cm,annot = True, linewidths= 0.5, linecolor=\"red\", fmt=\".0f\", ax=ax)\n",
    "plt.xlabel(\"y_pred\")\n",
    "plt.ylabel(\"y_true\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "5c53dd6d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K =  2 98.87777777777778\n",
      "K =  3 98.97777777777777\n",
      "K =  4 99.01111111111112\n",
      "K =  5 98.97777777777777\n",
      "K =  6 99.08888888888889\n",
      "K =  7 98.86666666666667\n",
      "K =  8 99.08888888888889\n",
      "K =  9 98.87777777777778\n",
      "K =  10 98.9888888888889\n",
      "K =  11 98.84444444444445\n",
      "K =  12 98.96666666666667\n",
      "K =  13 98.78888888888889\n",
      "K =  14 98.9\n",
      "K =  15 98.77777777777777\n",
      "K =  16 98.83333333333333\n",
      "K =  17 98.71111111111112\n",
      "K =  18 98.77777777777777\n",
      "K =  19 98.61111111111111\n",
      "K =  20 98.65555555555555\n",
      "K =  21 98.52222222222223\n",
      "K =  22 98.55555555555556\n",
      "K =  23 98.4\n",
      "K =  24 98.43333333333332\n",
      "K =  25 98.24444444444444\n",
      "K =  26 98.34444444444445\n",
      "K =  27 98.1888888888889\n",
      "K =  28 98.26666666666667\n",
      "K =  29 98.1\n",
      "K =  30 98.15555555555555\n",
      "K =  31 97.96666666666667\n",
      "K =  32 98.0\n",
      "K =  33 97.85555555555555\n",
      "K =  34 97.87777777777778\n",
      "K =  35 97.66666666666667\n",
      "K =  36 97.73333333333333\n",
      "K =  37 97.62222222222222\n",
      "K =  38 97.64444444444445\n",
      "K =  39 97.53333333333333\n",
      "K =  40 97.56666666666666\n",
      "K =  41 97.34444444444445\n",
      "K =  42 97.41111111111111\n",
      "K =  43 97.28888888888889\n",
      "K =  44 97.3\n",
      "K =  45 97.21111111111111\n",
      "K =  46 97.21111111111111\n",
      "K =  47 97.08888888888889\n",
      "K =  48 97.1\n",
      "K =  49 97.06666666666666\n",
      "K =  50 97.1\n",
      "K =  51 96.96666666666667\n",
      "K =  52 96.9888888888889\n",
      "K =  53 96.92222222222222\n",
      "K =  54 96.95555555555555\n",
      "K =  55 96.85555555555555\n",
      "K =  56 96.86666666666667\n",
      "K =  57 96.8\n",
      "K =  58 96.8\n",
      "K =  59 96.77777777777777\n",
      "K =  60 96.78888888888889\n",
      "K =  61 96.7\n",
      "K =  62 96.72222222222221\n",
      "K =  63 96.65555555555555\n",
      "K =  64 96.65555555555555\n",
      "K =  65 96.6\n",
      "K =  66 96.6\n",
      "K =  67 96.54444444444444\n",
      "K =  68 96.56666666666666\n",
      "K =  69 96.52222222222223\n",
      "K =  70 96.57777777777777\n",
      "K =  71 96.38888888888889\n",
      "K =  72 96.44444444444444\n",
      "K =  73 96.34444444444445\n",
      "K =  74 96.35555555555555\n",
      "K =  75 96.21111111111111\n",
      "K =  76 96.23333333333333\n",
      "K =  77 96.2\n",
      "K =  78 96.25555555555556\n",
      "K =  79 96.21111111111111\n",
      "K =  80 96.25555555555556\n",
      "K =  81 96.21111111111111\n",
      "K =  82 96.24444444444444\n",
      "K =  83 96.14444444444445\n",
      "K =  84 96.2\n",
      "K =  85 96.14444444444445\n",
      "K =  86 96.16666666666667\n",
      "K =  87 96.02222222222223\n",
      "K =  88 96.03333333333333\n",
      "K =  89 96.0\n",
      "K =  90 96.01111111111112\n",
      "K =  91 95.95555555555556\n",
      "K =  92 95.9888888888889\n",
      "K =  93 95.91111111111111\n",
      "K =  94 95.93333333333334\n",
      "K =  95 95.83333333333334\n",
      "K =  96 95.85555555555555\n",
      "K =  97 95.84444444444445\n",
      "K =  98 95.86666666666666\n",
      "K =  99 95.86666666666666\n",
      "K =  100 95.86666666666666\n",
      "96.29288888888887\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "         0.0       0.96      1.00      0.98      8133\n",
      "         1.0       1.00      0.57      0.73       867\n",
      "\n",
      "    accuracy                           0.96      9000\n",
      "   macro avg       0.98      0.79      0.85      9000\n",
      "weighted avg       0.96      0.96      0.95      9000\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sum = 0\n",
    "\n",
    "for x in range (1, 100):\n",
    "    neigh = KNeighborsClassifier(n_neighbors= x)\n",
    "    neigh.fit(X_train, y_train)\n",
    "    y_testing_prediction = neigh.predict(X_test)\n",
    "    x = x + 1\n",
    "    from sklearn.metrics import accuracy_score\n",
    "    sum = sum + accuracy_score(y_test, y_testing_prediction)*100\n",
    "    print(\"K = \",x,accuracy_score(y_test, y_testing_prediction)*100)\n",
    "    avg = sum/x\n",
    "print(avg)\n",
    "# Print out classification report and confusion matrix\n",
    "from sklearn.metrics import classification_report, confusion_matrix\n",
    "print(classification_report(y_test, y_testing_prediction))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "982c708e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(24000, 6)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(dataset[:,0:6], dataset[:,-1], test_size=0.2, random_state=32)\n",
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "94b2fbbe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K =  2 98.88333333333334\n",
      "K =  3 98.98333333333333\n",
      "K =  4 99.06666666666666\n",
      "K =  5 98.8\n",
      "K =  6 98.91666666666666\n",
      "K =  7 98.8\n",
      "K =  8 98.95\n",
      "K =  9 98.66666666666667\n",
      "K =  10 98.8\n",
      "K =  11 98.61666666666666\n",
      "K =  12 98.8\n",
      "K =  13 98.56666666666666\n",
      "K =  14 98.65\n",
      "K =  15 98.51666666666667\n",
      "K =  16 98.61666666666666\n",
      "K =  17 98.48333333333333\n",
      "K =  18 98.55000000000001\n",
      "K =  19 98.21666666666667\n",
      "K =  20 98.35000000000001\n",
      "K =  21 98.15\n",
      "K =  22 98.23333333333333\n",
      "K =  23 97.93333333333332\n",
      "K =  24 98.01666666666667\n",
      "K =  25 97.88333333333334\n",
      "K =  26 97.89999999999999\n",
      "K =  27 97.75\n",
      "K =  28 97.8\n",
      "K =  29 97.55\n",
      "K =  30 97.6\n",
      "K =  31 97.45\n",
      "K =  32 97.5\n",
      "K =  33 97.38333333333333\n",
      "K =  34 97.5\n",
      "K =  35 97.28333333333333\n",
      "K =  36 97.33333333333334\n",
      "K =  37 97.18333333333334\n",
      "K =  38 97.25\n",
      "K =  39 97.15\n",
      "K =  40 97.16666666666667\n",
      "K =  41 97.01666666666667\n",
      "K =  42 97.06666666666666\n",
      "K =  43 96.81666666666666\n",
      "K =  44 96.83333333333334\n",
      "K =  45 96.65\n",
      "K =  46 96.71666666666667\n",
      "K =  47 96.56666666666666\n",
      "K =  48 96.58333333333333\n",
      "K =  49 96.5\n",
      "K =  50 96.53333333333333\n",
      "K =  51 96.43333333333334\n",
      "K =  52 96.43333333333334\n",
      "K =  53 96.38333333333333\n",
      "K =  54 96.38333333333333\n",
      "K =  55 96.33333333333334\n",
      "K =  56 96.33333333333334\n",
      "K =  57 96.26666666666667\n",
      "K =  58 96.26666666666667\n",
      "K =  59 96.26666666666667\n",
      "K =  60 96.26666666666667\n",
      "K =  61 96.25\n",
      "K =  62 96.25\n",
      "K =  63 96.13333333333334\n",
      "K =  64 96.13333333333334\n",
      "K =  65 96.13333333333334\n",
      "K =  66 96.13333333333334\n",
      "K =  67 96.1\n",
      "K =  68 96.11666666666666\n",
      "K =  69 96.08333333333333\n",
      "K =  70 96.08333333333333\n",
      "K =  71 96.0\n",
      "K =  72 96.01666666666667\n",
      "K =  73 95.89999999999999\n",
      "K =  74 95.93333333333334\n",
      "K =  75 95.88333333333333\n",
      "K =  76 95.89999999999999\n",
      "K =  77 95.89999999999999\n",
      "K =  78 95.89999999999999\n",
      "K =  79 95.83333333333334\n",
      "K =  80 95.83333333333334\n",
      "K =  81 95.78333333333333\n",
      "K =  82 95.8\n",
      "K =  83 95.73333333333333\n",
      "K =  84 95.76666666666667\n",
      "K =  85 95.7\n",
      "K =  86 95.75\n",
      "K =  87 95.65\n",
      "K =  88 95.68333333333334\n",
      "K =  89 95.6\n",
      "K =  90 95.63333333333334\n",
      "K =  91 95.46666666666667\n",
      "K =  92 95.5\n",
      "K =  93 95.46666666666667\n",
      "K =  94 95.46666666666667\n",
      "K =  95 95.45\n",
      "K =  96 95.5\n",
      "K =  97 95.41666666666667\n",
      "K =  98 95.46666666666667\n",
      "K =  99 95.41666666666667\n",
      "K =  100 95.41666666666667\n",
      "95.90083333333328\n"
     ]
    }
   ],
   "source": [
    "sum = 0\n",
    "\n",
    "for x in range (1, 100):\n",
    "    neigh = KNeighborsClassifier(n_neighbors= x)\n",
    "    neigh.fit(X_train, y_train)\n",
    "    y_testing_prediction = neigh.predict(X_test)\n",
    "    x = x + 1\n",
    "    from sklearn.metrics import accuracy_score\n",
    "    sum = sum + accuracy_score(y_test, y_testing_prediction)*100\n",
    "    print(\"K = \",x,accuracy_score(y_test, y_testing_prediction)*100)\n",
    "    avg = sum/x\n",
    "print(avg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "115ac76e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(18000, 6)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(dataset[:,0:6], dataset[:,-1], test_size=0.4, random_state=42)\n",
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "13a567e0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K =  2 98.78333333333333\n",
      "K =  3 98.85000000000001\n",
      "K =  4 98.9\n",
      "K =  5 98.80833333333334\n",
      "K =  6 98.91666666666666\n",
      "K =  7 98.75833333333334\n",
      "K =  8 99.0\n",
      "K =  9 98.80833333333334\n",
      "K =  10 98.98333333333333\n",
      "K =  11 98.825\n",
      "K =  12 98.94166666666668\n",
      "K =  13 98.73333333333333\n",
      "K =  14 98.86666666666667\n",
      "K =  15 98.65833333333333\n",
      "K =  16 98.76666666666667\n",
      "K =  17 98.56666666666666\n",
      "K =  18 98.675\n",
      "K =  19 98.35833333333333\n",
      "K =  20 98.45\n",
      "K =  21 98.25833333333334\n",
      "K =  22 98.275\n",
      "K =  23 98.125\n",
      "K =  24 98.2\n",
      "K =  25 98.03333333333333\n",
      "K =  26 98.1\n",
      "K =  27 97.89999999999999\n",
      "K =  28 97.94166666666668\n",
      "K =  29 97.74166666666667\n",
      "K =  30 97.78333333333333\n",
      "K =  31 97.58333333333333\n",
      "K =  32 97.63333333333334\n",
      "K =  33 97.45833333333334\n",
      "K =  34 97.5\n",
      "K =  35 97.26666666666667\n",
      "K =  36 97.29166666666667\n",
      "K =  37 97.18333333333334\n",
      "K =  38 97.2\n",
      "K =  39 97.125\n",
      "K =  40 97.13333333333334\n",
      "K =  41 97.05\n",
      "K =  42 97.05833333333334\n",
      "K =  43 97.01666666666667\n",
      "K =  44 97.02499999999999\n",
      "K =  45 96.95\n",
      "K =  46 97.0\n",
      "K =  47 96.94166666666668\n",
      "K =  48 96.975\n",
      "K =  49 96.875\n",
      "K =  50 96.88333333333333\n",
      "K =  51 96.79166666666667\n",
      "K =  52 96.80833333333332\n",
      "K =  53 96.76666666666667\n",
      "K =  54 96.79166666666667\n",
      "K =  55 96.71666666666667\n",
      "K =  56 96.72500000000001\n",
      "K =  57 96.65833333333333\n",
      "K =  58 96.675\n",
      "K =  59 96.575\n",
      "K =  60 96.59166666666667\n",
      "K =  61 96.525\n",
      "K =  62 96.56666666666666\n",
      "K =  63 96.45833333333333\n",
      "K =  64 96.49166666666666\n",
      "K =  65 96.36666666666667\n",
      "K =  66 96.43333333333334\n",
      "K =  67 96.28333333333333\n",
      "K =  68 96.3\n",
      "K =  69 96.21666666666667\n",
      "K =  70 96.24166666666667\n",
      "K =  71 96.21666666666667\n",
      "K =  72 96.24166666666667\n",
      "K =  73 96.18333333333334\n",
      "K =  74 96.23333333333333\n",
      "K =  75 96.16666666666667\n",
      "K =  76 96.175\n",
      "K =  77 96.05\n",
      "K =  78 96.09166666666667\n",
      "K =  79 96.025\n",
      "K =  80 96.09166666666667\n",
      "K =  81 95.95833333333333\n",
      "K =  82 95.98333333333333\n",
      "K =  83 95.89999999999999\n",
      "K =  84 95.93333333333334\n",
      "K =  85 95.85833333333333\n",
      "K =  86 95.875\n",
      "K =  87 95.81666666666668\n",
      "K =  88 95.84166666666667\n",
      "K =  89 95.825\n",
      "K =  90 95.83333333333334\n",
      "K =  91 95.73333333333333\n",
      "K =  92 95.75833333333334\n",
      "K =  93 95.66666666666667\n",
      "K =  94 95.69166666666666\n",
      "K =  95 95.6\n",
      "K =  96 95.60833333333333\n",
      "K =  97 95.59166666666667\n",
      "K =  98 95.6\n",
      "K =  99 95.575\n",
      "K =  100 95.60833333333333\n",
      "96.07875000000006\n"
     ]
    }
   ],
   "source": [
    "sum = 0\n",
    "\n",
    "for x in range (1, 100):\n",
    "    neigh = KNeighborsClassifier(n_neighbors= x)\n",
    "    neigh.fit(X_train, y_train)\n",
    "    y_testing_prediction = neigh.predict(X_test)\n",
    "    x = x + 1\n",
    "    from sklearn.metrics import accuracy_score\n",
    "    sum = sum + accuracy_score(y_test, y_testing_prediction)*100\n",
    "    print(\"K = \",x,accuracy_score(y_test, y_testing_prediction)*100)\n",
    "    avg = sum/x\n",
    "print(avg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "6e8afabf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(21000, 6)"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(dataset[:,0:6], dataset[:,-1], test_size=0.3, random_state=42)\n",
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "a4781616",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Import svm model\n",
    "from sklearn import svm\n",
    "\n",
    "#Create a svm Classifier\n",
    "clf = svm.SVC() # Linear Kernel\n",
    "\n",
    "#Train the model using the training sets\n",
    "clf.fit(X_train, y_train)\n",
    "\n",
    "#Predict the response for test dataset\n",
    "y_pred = clf.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "4f1c5398",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 0., 0., ..., 0., 0., 0.])"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "2b5e2abc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.9341111111111111\n"
     ]
    }
   ],
   "source": [
    "#Import scikit-learn metrics module for accuracy calculation\n",
    "from sklearn import metrics\n",
    "\n",
    "# Model Accuracy: how often is the classifier correct?\n",
    "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "b92bd26f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recall: 0.3333333333333333\n"
     ]
    }
   ],
   "source": [
    "# Model Recall: what percentage of positive tuples are labelled as such?\n",
    "print(\"Recall:\",metrics.recall_score(y_test, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "4c39ac3d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "         0.0       0.93      1.00      0.96      8133\n",
      "         1.0       0.95      0.33      0.49       867\n",
      "\n",
      "    accuracy                           0.93      9000\n",
      "   macro avg       0.94      0.67      0.73      9000\n",
      "weighted avg       0.94      0.93      0.92      9000\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(classification_report(y_test, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ebec7a25",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
